import docx
import os

def read_doc_file(file_path):
    try:
        # 确保文件存在
        if not os.path.exists(file_path):
            print(f"File not found: {file_path}")
            return None
            
        doc = docx.Document(file_path)
        text = []
        for para in doc.paragraphs:
            text.append(para.text)
        for table in doc.tables:
            for row in table.rows:
                for cell in row.cells:
                    text.append(cell.text)
        return '\n'.join(text)
    except Exception as e:
        print(f"Error reading DOC file: {e}")
        return None

if __name__ == "__main__":
    # 使用原始字符串处理路径
    input_file = r"d:\桌面\实习\git-mcp-test\project\周文俐案例数据\岭回归案例\周文俐的专业实习报告.doc"
    output_file = r"d:\桌面\实习\git-mcp-test\project\周文俐案例数据\岭回归案例\doc_content.txt"
    
    print(f"Attempting to read: {input_file}")
    print(f"File exists: {os.path.exists(input_file)}")
    
    content = read_doc_file(input_file)
    if content:
        with open(output_file, 'w', encoding='utf-8') as f:
            f.write(content)
        print(f"Successfully converted DOC to TXT at {output_file}")
    else:
        print("Failed to read DOC file")